RNA was isolated and sequenced from the various cell types.
sample_table <- readxl::read_xlsx('Key.xlsx')
sample_table <- data.frame(sample_table)
rownames(sample_table) <- sample_table$INDEXED.SAMPLE.NAME
sample_table$INDEXED.SAMPLE.NAME <-NULL
sample_table$GROUP.NAME <- c("D_StemB","D_StemB","D_StemB","D_ProB","D_ProB","D_ProB","D_ProB","R_StemB_StemB","R_StemB_StemB","R_StemB_StemB","R_StemB_StemB","R_StemB_StemB","R_StemB_ProB","R_StemB_ProB","R_StemB_ProB","R_StemB_ProB","R_ProB_ProB","R_ProB_ProB","R_ProB_ProB","R_ProB_ProB","R_ProB_ProB","R_ProB_StemB","R_ProB_StemB","R_ProB_StemB","R_ProB_StemB","R_ProB_StemB")
sample_table<-rownames_to_column(sample_table,"File.ID")
datatable(sample_table) %>%
formatStyle("GROUP.NAME", backgroundColor = styleEqual(c("D_StemB","D_ProB","R_StemB_StemB","R_StemB_ProB","R_ProB_ProB","R_ProB_StemB"), c("#EAD3BF","#AA9486", "#B6854D","#9986A5","#79402E","#CC79A7")))
# big_counts <- paste0("myriad/",rownames(sample_table),"/replicate_1/qc/",rownames(sample_table), "_QC.summary.txt/QC.geneCounts.formatted.for.DESeq.txt.gz") %>% map(gzfile) %>% map(read.table, stringsAsFactors = FALSE, sep = "\t") %>% reduce(left_join, by = "V1")
#
# colnames(big_counts) <- c("ensgene", rownames(sample_table))
# rownames(big_counts) <- big_counts$ensgene
# big_counts$ensgene <- NULL
# big_counts <- big_counts[!is.na(rowSums(big_counts)),]
# saveRDS(big_counts,"big_counts.rds")
big_counts <- read.table('myriad/temp2',stringsAsFactors = FALSE, header = TRUE)
count_matrix <- as.matrix(big_counts)
colnames(count_matrix) <- rownames(sample_table)
rownames(count_matrix) <- rownames(big_counts)
sce <- SingleCellExperiment(list(counts = count_matrix),
colData = sample_table)
sce <- sce[rowSums(counts(sce)) > 0,]
sce <- calculateQCMetrics(sce)
## prepare total count and total features data
my_df <- data.frame("sample_id" = colnames(sce), "total_counts" = sce$total_counts,
"total_features" = sce$total_features_by_counts )
ggplot(my_df,aes(total_counts)) + geom_histogram(bins = 10) +
theme_minimal() + ggtitle("Histogram of total counts") + ylab("number of samples") + xlab("total counts")
ggplot(my_df,aes(total_features)) + geom_histogram(bins = 10) +
theme_minimal() + ggtitle("Histogram of total features") + ylab("number of samples") + xlab("total features")
dds <- DESeqDataSetFromMatrix(big_counts, colData = sample_table, design = ~GROUP.NAME)
my_vst <- vst(dds)
pcaData <- DESeq2::plotPCA(my_vst, intgroup =c("DISEASE.STAGE","INJECTED.POPULATION"),returnData = TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData,aes(PC1,PC2,colour = DISEASE.STAGE, shape = INJECTED.POPULATION)) + geom_point(size = 3 ) +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) + geom_text(data=subset(pcaData,PC1 > 50), aes(PC1,PC2,label = name),nudge_x = -16) +
theme_minimal()
big_counts <- big_counts[,grep("VT40_N708_S502",colnames(big_counts),invert = T)]
sample_table <- sample_table[grep("VT40_N708_S502", sample_table$File.ID,invert = T),]
dds <- DESeqDataSetFromMatrix(big_counts, colData = sample_table, design = ~GROUP.NAME)
my_vst <- vst(dds)
pcaData <- DESeq2::plotPCA(my_vst, intgroup =c("DISEASE.STAGE","INJECTED.POPULATION"),returnData = TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData,aes(PC1,PC2,colour = DISEASE.STAGE, shape = INJECTED.POPULATION)) + geom_point(size = 3 ) +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) + geom_text(data=subset(pcaData,PC1 > 50), aes(PC1,PC2,label = name),nudge_x = -16) +
theme_minimal()
hallmark <- gmtPathways("~/genome_apps/GSEA/h.all.v6.2.symbols.gmt")
pca1_gsea = function(object) {
# calculate the variance for each gene
rv <- rowVars(assay(object))
# select the 1000 top genes by variance
select <- order(rv, decreasing=TRUE)[seq_len(min(2000, length(rv)))]
# perform a PCA on the data in assay(x) for the selected genes
pca <- prcomp(t(assay(object)[select,]))
PCA1_contrib <- sort(pca$rotation[,1], decreasing = TRUE )
PCA1_contrib <- data.frame("ensgene" = names(PCA1_contrib), PCA1_contrib)
PCA1_contrib <- left_join(PCA1_contrib,grch38,by="ensgene")
PCA1_contrib <- PCA1_contrib[complete.cases(PCA1_contrib$symbol),]
for_gsea <- PCA1_contrib$PCA1_contrib
names(for_gsea) <- PCA1_contrib$symbol
fgseaRes <- fgsea(hallmark, for_gsea, nperm=10000, maxSize = 500)
fgseaRes <- fgseaRes[fgseaRes$padj < 0.05,]
plotGseaTable(hallmark[fgseaRes$pathway], for_gsea, fgseaRes, gseaParam = 0.4)
return(PCA1_contrib)
}
pc1_res<-pca1_gsea(my_vst)
datatable(pc1_res,options = list(dom = 'Bfrtip',buttons = c('copy', 'csv', 'excel')))
pca2_gsea = function(object) {
# calculate the variance for each gene
rv <- rowVars(assay(object))
# select the 1000 top genes by variance
select <- order(rv, decreasing=TRUE)[seq_len(min(2000, length(rv)))]
# perform a PCA on the data in assay(x) for the selected genes
pca <- prcomp(t(assay(object)[select,]))
PCA1_contrib <- sort(pca$rotation[,2], decreasing = TRUE )
PCA1_contrib <- data.frame("ensgene" = names(PCA1_contrib), PCA1_contrib)
PCA1_contrib <- left_join(PCA1_contrib,grch38,by="ensgene")
PCA1_contrib <- PCA1_contrib[complete.cases(PCA1_contrib$symbol),]
for_gsea <- PCA1_contrib$PCA1_contrib
names(for_gsea) <- PCA1_contrib$symbol
fgseaRes <- fgsea(hallmark, for_gsea, nperm=10000, maxSize = 500)
fgseaRes <- fgseaRes[fgseaRes$padj < 0.05,]
plotGseaTable(hallmark[fgseaRes$pathway], for_gsea, fgseaRes, gseaParam = 0.4)
return(PCA1_contrib)
}
pca2_res <- pca2_gsea(my_vst)
datatable(pca2_res,options = list(dom = 'Bfrtip',buttons = c('copy', 'csv', 'excel')))
library(gtools)
my_combinations <- data.frame(combinations(n=6,r=2,v=unique(sample_table$GROUP.NAME)),stringsAsFactors = F)
colnames(my_combinations) <- c("first","second")
datatable(my_combinations)
# deseq <- DESeq(dds)
# saveRDS(deseq,"vt40_deseq.rds")
deseq <- readRDS("vt40_deseq.rds")
DGE <- function(c1,c2){
res <- results(deseq,contrast = c("GROUP.NAME", c1, c2))
resOrdered <- res[order(res$padj),]
resOrdered <- resOrdered[complete.cases(resOrdered),]
resOrdered <- data.frame(resOrdered)
resOrdered <- left_join(rownames_to_column(resOrdered,var = "ensgene"), grch38, by = "ensgene") %>% select(symbol,log2FoldChange,padj)
res_for_table <- resOrdered %>% dplyr::filter(padj < 0.1)
return(list(resOrdered=resOrdered,res_for_table=res_for_table))
}
my_res<-DGE(my_combinations$first[1],my_combinations$second[1])
datatable(my_res$res_for_table, extensions = 'Buttons',
caption = paste(my_combinations$first[1]," vs ",my_combinations$second[1]),
options = list(dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel')))
plot_gsea <- function(res){
res <- res[complete.cases(res),]
res$fcSign <- sign(res$log2FoldChange)
res$logP <- -log10(res$padj)
res$metric <- res$logP/res$fcSign
y<-res[,c("symbol", "metric")]
geneList <- y$metric
names(geneList) <- y$symbol
geneList <- geneList[order(geneList, decreasing = T)]
fgseaRes <- fgsea(hallmark, geneList, nperm=10000, maxSize = 500)
fgseaRes <- fgseaRes[fgseaRes$padj < 0.05,]
plotGseaTable(hallmark[fgseaRes$pathway], geneList, fgseaRes, gseaParam = 0.4)
}
plot_gsea(my_res$resOrdered)
my_res<-DGE(my_combinations$first[2],my_combinations$second[2])
datatable(my_res$res_for_table, extensions = 'Buttons',
caption = paste(my_combinations$first[2]," vs ",my_combinations$second[2]),
options = list(dom = 'Bfrtip',buttons = c('copy', 'csv', 'excel')))
plot_gsea(my_res$resOrdered)
my_res<-DGE(my_combinations$first[3],my_combinations$second[3])
datatable(my_res$res_for_table, extensions = 'Buttons',
caption = paste(my_combinations$first[3]," vs ",my_combinations$second[3]),
options = list(dom = 'Bfrtip',buttons = c('copy', 'csv', 'excel')))
plot_gsea(my_res$resOrdered)
my_res<-DGE(my_combinations$first[4],my_combinations$second[4])
datatable(my_res$res_for_table, extensions = 'Buttons',
caption = paste(my_combinations$first[4]," vs ",my_combinations$second[4]),
options = list(dom = 'Bfrtip',buttons = c('copy', 'csv', 'excel')))
plot_gsea(my_res$resOrdered)
my_res<-DGE(my_combinations$first[5],my_combinations$second[5])
datatable(my_res$res_for_table, extensions = 'Buttons',
caption = paste(my_combinations$first[5]," vs ",my_combinations$second[5]),
options = list(dom = 'Bfrtip',buttons = c('copy', 'csv', 'excel')))
plot_gsea(my_res$resOrdered)
my_res<-DGE(my_combinations$first[6],my_combinations$second[6])
datatable(my_res$res_for_table, extensions = 'Buttons',
caption = paste(my_combinations$first[6]," vs ",my_combinations$second[6]),
options = list(dom = 'Bfrtip',buttons = c('copy', 'csv', 'excel')))
plot_gsea(my_res$resOrdered)
my_res<-DGE(my_combinations$first[7],my_combinations$second[7])
datatable(my_res$res_for_table, extensions = 'Buttons',
caption = paste(my_combinations$first[7]," vs ",my_combinations$second[7]),
options = list(dom = 'Bfrtip',buttons = c('copy', 'csv', 'excel')))
plot_gsea(my_res$resOrdered)
my_res<-DGE(my_combinations$first[8],my_combinations$second[8])
datatable(my_res$res_for_table, extensions = 'Buttons',
caption = paste(my_combinations$first[8]," vs ",my_combinations$second[8]),
options = list(dom = 'Bfrtip',buttons = c('copy', 'csv', 'excel')))
plot_gsea(my_res$resOrdered)
my_res<-DGE(my_combinations$first[9],my_combinations$second[9])
datatable(my_res$res_for_table, extensions = 'Buttons',
caption = paste(my_combinations$first[9]," vs ",my_combinations$second[9]),
options = list(dom = 'Bfrtip',buttons = c('copy', 'csv', 'excel')))
plot_gsea(my_res$resOrdered)
my_res<-DGE(my_combinations$first[10],my_combinations$second[10])
datatable(my_res$res_for_table, extensions = 'Buttons',
caption = paste(my_combinations$first[10]," vs ",my_combinations$second[10]),
options = list(dom = 'Bfrtip',buttons = c('copy', 'csv', 'excel')))
plot_gsea(my_res$resOrdered)
my_res<-DGE(my_combinations$first[11],my_combinations$second[11])
datatable(my_res$res_for_table, extensions = 'Buttons',
caption = paste(my_combinations$first[11]," vs ",my_combinations$second[11]),
options = list(dom = 'Bfrtip',buttons = c('copy', 'csv', 'excel')))
plot_gsea(my_res$resOrdered)
my_res<-DGE(my_combinations$first[12],my_combinations$second[12])
datatable(my_res$res_for_table, extensions = 'Buttons',
caption = paste(my_combinations$first[12]," vs ",my_combinations$second[12]),
options = list(dom = 'Bfrtip',buttons = c('copy', 'csv', 'excel')))
plot_gsea(my_res$resOrdered)
my_res<-DGE(my_combinations$first[13],my_combinations$second[13])
datatable(my_res$res_for_table, extensions = 'Buttons',
caption = paste(my_combinations$first[13]," vs ",my_combinations$second[13]),
options = list(dom = 'Bfrtip',buttons = c('copy', 'csv', 'excel')))
plot_gsea(my_res$resOrdered)
my_res<-DGE(my_combinations$first[14],my_combinations$second[14])
datatable(my_res$res_for_table, extensions = 'Buttons',
caption = paste(my_combinations$first[14]," vs ",my_combinations$second[14]),
options = list(dom = 'Bfrtip',buttons = c('copy', 'csv', 'excel')))
plot_gsea(my_res$resOrdered)
my_res<-DGE(my_combinations$first[15],my_combinations$second[15])
datatable(my_res$res_for_table, extensions = 'Buttons',
caption = paste(my_combinations$first[15]," vs ",my_combinations$second[15]),
options = list(dom = 'Bfrtip',buttons = c('copy', 'csv', 'excel')))
plot_gsea(my_res$resOrdered)